/*
This software module was originally developed by 
        Hang-Seop Lee (hslee@etri.re.kr), ETRI
        Jung-Chul Lee (jclee@etri.re.kr), ETRI
and edited by Hang-Seop Lee, Jung-Chul Lee of ETRI, 
in the course of development of the MPEG-4.
This software module is an implementation of a part of one or
more MPEG-4 tools as specified by the MPEG-4.
ISO/IEC gives users of the MPEG-4 free license to this                
software module or modifications thereof for use in hardware
or software products claiming conformance to the MPEG-4.
Those intending to use this software module in hardware or software
products are advised that its use may infringe existing patents.
The original developer of this software module and his/her company,
the subsequent editors and their companies, and ISO/IEC have no
liability for use of this software module or modifications thereof
in an implementation.
Copyright is not released for non MPEG-4 conforming
products. ETRI retains full right to use the code for his/her own
purpose, assign or donate the code to a third party and to
inhibit third parties from using the code for non
MPEG-4 conforming products.
This copyright notice must be included in all copies or
derivative works. Copyright (c) 1997.
*/    

#include "stdafx.h"
#include "Modeless.h"
#include "ModelessDlg.h"
#include "ModelessOptDlg.h"
#include "LipSyncDlg.h"
#include "ModelessTextDlg.h"
#include "tts-structure.h"
#include "tts-bitstream.h"

#define	TEXT_SYNC	(int)0
#define	VIDEO_SYNC	(int)1

extern class CLipSyncDlg* pCLip;
extern class CModelessDlg* ppp;
extern class CModelessOptDlg* pOpt;
extern class CModelessTextDlg* pText;

extern int	nFILE;		// for Caption On/Off
extern int	nS_MAX;		// defined in ModelessOptDlg.cpp
extern float	f_CURPOS_F, f_SPCHRATE;
extern int	nFLAG_TRICK;

/******************** TTS Bit-Stream ********************/ 

extern int TTS_seq, TTS_sent;
extern short TTSinputType;
extern char TTSCmmd, Silence, Gender, Age, Speech_Rate, Niframe;
extern char Text[500], Phone[1000], Ptch[1000][3], Energy[1000][3], Lshape[1000];
extern short Sdur, Nphone, Dur[1000], SntDur, PinSnt, Offset, LinSnt[1000];
extern int v_st,v_ed,sp_st, sp_ed;

/* Bit assignment */

extern int BL_code, BP_en, BV_en, BL_en, BT_en;
extern int BSilence, BSdur;
extern int BTTSCmmd, BGender, BAge, BSpeech_Rate;
extern int BNtext, BText;
extern int BNphone, BPhone, BDur, BPtch, BEnergy;
extern int BSntDur, BPinSnt, BOffset;
extern int BNlip, BLinSnt, BLshape;

/******************** TTS Bit-Stream ********************/

extern BOOL SoundOutInit();
extern BOOL SoundOutUninit();
extern void VSayOneParagraph(TTS_Data *Tts, FRAME_Data *Frame, int *T_Sync);
extern void	play(int nsp);

extern union SYN { short iwave[1024*32]; unsigned char buf[1024*64]; } syn;

FILE *Open_File();
void Init_Data();
void InitSliderBar(int nKind_Slider);
void Set_start_time(FILE *fp, int nPOS_MOV);
int rd_txt(FILE *fp, TTS_Data *Tts, FRAME_Data *Frame);
char **cmatrix(int nrh, int nch);
void free_cmatrix(char **m, int nrh, int nch);
void Idata_read(int *A, int n, int nbits, unsigned char *B, FILE *fp);
void Sdata_read(short *A, int n, int nbits, unsigned char *B, FILE *fp);
void Cdata_read(char *A, int n, int nbits, unsigned char *B, FILE *fp);
void CMdata_read(char **A, int n1, int n2, int nbits, unsigned char *B, FILE *fp);
void free_Data(FRAME_Data *Frame);
void Set_Slider_Pos(int *T_Sync);
int ChangeLip( FRAME_Data *Frame );

int IsVideo;
int T_Sync[3];
// T_Sync[0] == current time
// T_Sync[1] == total time
// T_Sync[2] == start time of Sentence

TTS_Data *Tts;
FRAME_Data *Frame;


SENTENCE_Data *Snt;
Prosody_Data *Prosody;
Video_Data *Video;
Lip_Data *Lip;

unsigned char YY[2];

FILE *Open_File()
{
    int i; char c;
    FILE *fp;

    YY[0]=0; YY[1]=0;
    if((fp=fopen("mpeg_tts.dat","rb"))==NULL) exit(1);
    Idata_read(&i,1,32,YY,fp);
    if(i!=TTS_seq) { 
		AfxMessageBox("Err in TTS_seq"); exit(1); }
    Cdata_read(&(Tts->TTS_seq_id),1,BTTS_seq_id,YY,fp);
    Sdata_read(&(Tts->L_code),1,BL_code,YY,fp);
    Cdata_read(&(Tts->G_en),1,BG_en,YY,fp);
    Cdata_read(&(Tts->A_en),1,BA_en,YY,fp);
    Cdata_read(&(Tts->R_en),1,BR_en,YY,fp);
    Cdata_read(&(Tts->P_en),1,BP_en,YY,fp);
    Cdata_read(&(Tts->V_en),1,BV_en,YY,fp);
    Cdata_read(&(Tts->L_en),1,BL_en,YY,fp);
    Cdata_read(&(Tts->T_en),1,BT_en,YY,fp);
    if(YY[1]!=0) { c=0; i=YY[1]; Cdata_read(&c,i,1,YY,fp); YY[1]=0; }

    return(fp);
}


void Init_Data()
{
    Tts=(TTS_Data *) malloc(sizeof(TTS_Data));
    Frame=(FRAME_Data *) malloc(sizeof(FRAME_Data));
    Frame->Sentence=NULL;
}


/* ------ InitSliderBar() ------ */
//
// MPEG-4 TTS Bit Stream  ° main control dialog  ư
// slider bar ʱȭ Ѵ.
//
// * MPEG4_Init() call this function.
//

void InitSliderBar(int nKind_Slider)
{
	if (nKind_Slider == TEXT_SYNC)
	{
		// clear VideoSync Bar
		ppp->m_slider_position.SetPos(0);
		ppp->m_slider_position.ClearTics();
		ppp->m_slider_position.SetRange(0, 0);
		// initiate TextSync Slider Bar
		ppp->m_slider_position_F.SetRange(0, nS_MAX-1);
		ppp->m_slider_position_F.SetTicFreq(1);
		ppp->m_slider_position_F.SetPos(0);
	}
	else	// for Moving Picture
	{
		// clear TextSync Bar
		ppp->m_slider_position_F.SetPos(0);
		ppp->m_slider_position_F.ClearTics();
		ppp->m_slider_position_F.SetRange(0, 0);
		// initiate VideoSync Slider Bar
		ppp->m_slider_position.SetRange(0, nS_MAX-1);
		ppp->m_slider_position.SetTicFreq(1);
		ppp->m_slider_position.SetPos(0);
	}

	// active play, backward, forward button
	if( Tts->T_en )
	{
		ppp->m_editPlay.EnableWindow( TRUE );
		ppp->m_editBackward.EnableWindow( TRUE );
		ppp->m_editForward.EnableWindow( TRUE );
	}
	else
	{
		ppp->m_editStop.EnableWindow( TRUE );
		ppp->m_editBackward.EnableWindow( FALSE );
		ppp->m_editForward.EnableWindow( FALSE );
	}

}


void Set_start_time(FILE *fp, int nPOS_MOV)
{
    int i;


    for(i=0; i<nPOS_MOV; i++) { rd_txt(fp,Tts,Frame); free_Data(Frame); }
    v_st=nPOS_MOV*500;

}


int rd_txt(FILE *fp, TTS_Data *Tts, FRAME_Data *Frame)
{
    int i, TTS_sent=0x012121212;
    short Ntext, Sdur, Nphone, Nlip;

    YY[0]=YY[1]=0;
    Idata_read(&i,1,32,YY,fp);
	if(i!=TTS_sent) return(0);
    Sdata_read(&(Frame->TTS_id),1,BTTS_id,YY,fp);
    Cdata_read(&Frame->Silence,1,BSilence,YY,fp);
    if(Frame->Silence) {
		Sdata_read(&Sdur,1,BSdur,YY,fp);
		Frame->Silence=Sdur; Frame->Sentence=NULL;
    	YY[0]=YY[1]=0;
	}
    else {
        Frame->Silence=0; 
        Frame->Sentence=Snt=(SENTENCE_Data *)malloc(sizeof(SENTENCE_Data));
        Snt->Prosody=NULL; Snt->Video=NULL; Snt->Lip_shape=NULL;

        if(Tts->G_en) {
			Cdata_read(&Snt->Gender,1,BGender,YY,fp);
	    }
		else Snt->Gender = 0;	// default gender is female
        if(Tts->A_en) {
			Cdata_read(&Snt->Age,1,BAge,YY,fp);
	    }
		else Snt->Age = 4;
		if(!Tts->V_en && Tts->R_en) {
			Cdata_read(&Snt->Speech_Rate,1,BSpeech_Rate,YY,fp);
			// Speech Rate Slider Bar Control
			if(pOpt->m_check_default.GetCheck() || !Tts->T_en)
				pOpt->m_slider_SpchRate.SetPos(Snt->Speech_Rate);
		}
		else Snt->Speech_Rate = 3;

		Sdata_read(&Ntext,1,BNtext,YY,fp); Snt->Ntext=Ntext;
		Snt->Text=(char *)malloc(Ntext+2); 
		Cdata_read(Snt->Text,Ntext,BText,YY,fp); Snt->Text[Ntext]=0;

        if(Tts->P_en) { 
			Snt->Prosody=Prosody=(Prosody_Data *)malloc(sizeof(Prosody_Data));
			Cdata_read(&(Prosody->Dur_en),1,BDur_en,YY,fp);
			Cdata_read(&(Prosody->F0_en),1,BF0_en,YY,fp);
			Cdata_read(&(Prosody->En_en),1,BEn_en,YY,fp);
			Sdata_read(&Nphone,1,BNphone,YY,fp); Prosody->Nphone=Nphone;
			Sdata_read(&(Prosody->Sphone),1,BSphone,YY,fp); 
			i=Prosody->Sphone;
			Prosody->Phone=(char *)malloc((i+2));
			Cdata_read(Prosody->Phone,i,BPhone,YY,fp);
			if(Prosody->Dur_en) {
	    		Prosody->Dur=(short *)malloc((Nphone+2)*2); 
	    		Sdata_read(Prosody->Dur,Nphone,BDur,YY,fp);
			}
			else Prosody->Dur=NULL;

			if(Prosody->F0_en) {
	    		Prosody->Ptch=cmatrix(Nphone+2,3);
	    		CMdata_read(Prosody->Ptch,Nphone,3,BPtch,YY,fp);
			}
			else Prosody->Ptch=NULL;

			if(Prosody->En_en) {
	    		Prosody->Energy=cmatrix(Nphone+2,3);
	    		CMdata_read(Prosody->Energy,Nphone,3,BEnergy,YY,fp);
			}
			else Prosody->Energy=NULL;
	    }
        if(Tts->V_en) { 
			Snt->Video=Video=(Video_Data *) malloc(sizeof(Video_Data));
			Sdata_read(&Video->SntDur,1,BSntDur,YY,fp);
			Sdata_read(&Video->PinSnt,1,BPinSnt,YY,fp);
			Sdata_read(&Video->Offset,1,BOffset,YY,fp);
	    }
        if(Tts->L_en) { 
			Snt->Lip_shape=Lip=(Lip_Data *)malloc(sizeof(Lip_Data));
			Sdata_read(&Nlip,1,BNlip,YY,fp); Lip->Nlip=Nlip;
			Lip->LinSnt=(short *)malloc((Nlip+2)*2); 
			Sdata_read(Lip->LinSnt,Nlip,BLinSnt,YY,fp);
			Lip->Lshape=(char *)malloc(Nlip+2);
			Cdata_read(Lip->Lshape,Nlip,BLshape,YY,fp);
	    }
		YY[0]=0; YY[1]=0;
	}
    return(1);
}


/* ------ MPEG4_Init() ------ */
//
// MPEG4-TTS Bit Stream о鿩   encoding Ǿ° 
// Ǵϸ,  쿡  ʿ ʱȭ ۾ ش.
// 
// * ܺ α׷  ̽
//

int MPEG4_Init()
{
    int i, j=0;
	FILE	*fp;

	Init_Data();

    fp=Open_File();
   
    while( (i=rd_txt(fp,Tts,Frame)) != 0 ) {
		j++; free_Data(Frame); 
	}
    v_ed=500*j;	fclose( fp );

	if( Tts->T_en )		nFLAG_TRICK = TRUE;
	else	nFLAG_TRICK = FALSE;

	IsVideo = Tts->V_en;	// ModelessDlg.cpp  

 	MessageBox(NULL, "MPEG-4 TTS Data Decoding O.K", "MPEG4_TTS", MB_OK);

	nS_MAX=0;
	pOpt->m_check_default.SetCheck(1);

	if(Tts->V_en)	// for Moving Picture 
	{ 
		nS_MAX = (int)(v_ed/500);

		// Init VideoSync Bar
		InitSliderBar(1);

		T_Sync[1] = v_ed * 16;
	}
	else		// for Text Only
	{		
		nS_MAX = j;		
		// Init TextSync Bar
		InitSliderBar(0);
	}

	return(nFLAG_TRICK);

}


/* ---------- MPEG4_TTS_Play() ---------- */
//
//	void MPEG4_TTS_Play()
//
//	MPEG-4 TTS Bit Stream decoding Ͽ   
//  ռ⸦ Ų.
//
//  * ܺ α׷ ̽ 
//

void MPEG4_TTS_Play()
{
	int i, fSnt=0;
	int nAdd=0;
	int nPOS_MOV;
	int	nCurPos;
	int	nSentence=0;
	FILE	*fp;


	SoundOutInit();									// SoundIO.c

	fp = Open_File();

	free_Data(Frame);
	
	if( Tts->V_en )
	{
		nPOS_MOV = ppp->m_slider_position.GetPos();

		if( Tts->T_en )
		{
			// slider bar ġ sample ȯ
			T_Sync[0] = nPOS_MOV * 500 * 16;
			Set_start_time(fp, nPOS_MOV);
			nCurPos = nPOS_MOV;
		}
		else
		{
			T_Sync[0] = 0;
			nCurPos = 0;
		}
	}

	while( rd_txt(fp, Tts, Frame) != 0 ) {
		if (nFILE == TRUE && !Frame->Silence)
			pText->m_edit_Sentence.SetWindowText(Frame->Sentence->Text);

		if(Frame->Sentence == NULL) {
			for(i=0; i<8000; i++) 
				syn.iwave[i]=0; 
			play(i); 
			T_Sync[0] += i;
			Set_Slider_Pos( T_Sync );
			continue;
	    }

		if(Tts->V_en) 
		{
			Video = Snt->Video;
			if(Video->PinSnt!=0 && fSnt==1)
			{
				free_Data(Frame);
				continue;
			}
			else if(Video->PinSnt!=0 && fSnt==0)	fSnt=1;
			else if(Video->PinSnt==0) {
				for(i=0; i<Video->Offset*16; i++) 
					syn.iwave[i]=0; 
				play(i);
				T_Sync[0] += i;
				fSnt=1;
			}

			VSayOneParagraph(Tts, Frame, T_Sync);
		}
		if( Tts->V_en != 1 )
		{
			if( !pOpt->m_check_default.GetCheck() && Tts->T_en )
				Frame->Sentence->Speech_Rate = (short)f_SPCHRATE;

			if( (int)f_CURPOS_F <= nSentence++ )
			{
	  			VSayOneParagraph(Tts, Frame, T_Sync);
				//  ռϰ ִ ġ control dialog  slider  ݿѴ.
				nAdd++;
				nCurPos = (int)f_CURPOS_F+nAdd;
				ppp->m_slider_position_F.SetPos(nCurPos);
				//  ռϰ ִ  sentence edit box ǥѴ.
			}
		}

		free_Data(Frame);
	}

	fclose( fp );

	SoundOutUninit();								// SoundIo.c
	ppp->m_slider_position.SetPos(0);

}


char **cmatrix(int nrh, int nch)
{
    int i;
    char **m;
    m = (char **)malloc((unsigned) (nrh+1)*sizeof(char*));
    for( i=0;i <=nrh;i++) 
        m[i] = (char *) malloc((unsigned) (nch+1)*sizeof(char));

    return m;
}

void free_cmatrix(char **m, int nrh, int nch)
{
    int i;
    for( i=nrh;i >=0;i--) free((char*) (m[i]));
    free((char*) m);
}

void Idata_read(int *A, int n, int nbits, unsigned char *B, FILE *fp)
{
    int i, nx, ny;
    unsigned int X; unsigned char Y; 
    short W[9]={ 0,1,3,7,0x0F,0x01F,0x03F,0x07F,0x0FF }; 

    Y=B[0]; ny=B[1];
    for(i=0; i<n; i++) 
	{
		nx=nbits; *(A+i)=0; 
		while(nx!=0) 
		{
			if(nx>ny) 
			{ 
				X=(Y & W[ny]); 
				*(A+i)=*(A+i) | (X << (nx-ny)); 
	    		nx-=ny; 
				ny=8; 
				fread(&Y,1,1,fp); 
			}
			else 
			{ 
				X=(Y >> (ny-nx)) & W[nx]; 
				*(A+i)=*(A+i) | X; 
				ny-=nx; 
				nx=0; 
			}
	    }
	} 
	B[0]=Y; B[1]=ny;
}


void Sdata_read(short *A, int n, int nbits, unsigned char *B, FILE *fp)
{
    int i, nx, ny;
    unsigned int X; unsigned char Y; 
    short W[9]={ 0,1,3,7,0x0F,0x01F,0x03F,0x07F,0x0FF }; 

    Y=B[0]; ny=B[1];
    for(i=0; i<n; i++) 
	{
		nx=nbits; *(A+i)=0; 
		while(nx!=0) 
		{
			if(nx>ny) 
			{ 
				X=(Y & W[ny]); 
				*(A+i)=*(A+i) | (X << (nx-ny)); 
	    		nx-=ny; 
				ny=8; 
				fread(&Y,1,1,fp); 
			}
			else 
			{ 
				X=(Y >> (ny-nx)) & W[nx]; 
				*(A+i)=*(A+i) | X; 
				ny-=nx; 
				nx=0; 
			}
	    }
	} 
	B[0]=Y; B[1]=ny;
}

void Cdata_read(char *A, int n, int nbits, unsigned char *B, FILE *fp)
{
    int i, nx, ny;
    unsigned char X,Y;
    short W[9]={ 0,1,3,7,0x0F,0x01F,0x03F,0x07F,0x0FF }; 

    Y=B[0]; ny=B[1];
    for(i=0; i<n; i++) 
	{
		nx=nbits; *(A+i)=0; 
		while(nx!=0) 
		{
			if(nx>ny) 
			{ 
				X=(Y & W[ny]); 
				*(A+i)=*(A+i) | (X << (nx-ny)); 
	    		nx-=ny; ny=8; 
				fread(&Y,1,1,fp); 
			}
			else
			{ 
				X=(Y >> (ny-nx)) & W[nx]; 
				*(A+i)=*(A+i) | X;
				ny-=nx; nx=0; 
			}
	    }
	} 
	B[0]=Y; B[1]=ny;

}


void CMdata_read(char **A, int n1, int n2, int nbits, unsigned char *B, FILE *fp)
{
    int i, j, nx, ny;
    unsigned char X,Y;
    short W[9]={ 0,1,3,7,0x0F,0x01F,0x03F,0x07F,0x0FF }; 

    Y=B[0]; ny=B[1];
    for(i=0; i<n1; i++) 
	{
        for(j=0; j<n2; j++) 
		{
		    nx=nbits; A[i][j]=0; 
		    while(nx!=0) 
			{
		        if(nx>ny) 
				{ 
				    X=(Y & W[ny]); 
					A[i][j]=A[i][j] | (X << (nx-ny)); 
		    	    nx-=ny; ny=8; 
					fread(&Y,1,1,fp); 
			    }
		        else 
				{
				    X=(Y >> (ny-nx)) & W[nx]; 
					A[i][j]=A[i][j] | X; 
				    ny-=nx; nx=0;
				}   
			}
	    } 
		B[0]=Y; B[1]=ny;
	}
}



void free_Data(FRAME_Data *Frame)
{
    SENTENCE_Data *Snt; int np;

    Snt=Frame->Sentence;
    if(Snt==NULL) return;
    if(Snt->Text!=NULL) free(Snt->Text);
    if(Snt->Prosody!=NULL)
	{
		np=Snt->Prosody->Nphone+2;
		if(Snt->Prosody->Phone!=NULL) free(Snt->Prosody->Phone); 
		if(Snt->Prosody->Dur!=NULL) free(Snt->Prosody->Dur);
		if(Snt->Prosody->Ptch!=NULL) free_cmatrix(Snt->Prosody->Ptch,np,3);
		if(Snt->Prosody->Energy!=NULL) free_cmatrix(Snt->Prosody->Energy,np,3);
		free(Snt->Prosody); 
	}
    if(Snt->Video!=NULL) free(Snt->Video);
    if(Snt->Lip_shape!=NULL) 
	{
		if(Snt->Lip_shape->LinSnt!=NULL)	free(Snt->Lip_shape->LinSnt);
		if(Snt->Lip_shape->Lshape!=NULL)	free(Snt->Lip_shape->Lshape);
		free(Snt->Lip_shape); 
	} 
    if(Snt!=NULL) { free(Snt); Frame->Sentence=NULL; }
}


/* ------ ChangeLip() ------ */
//
// moving picture Լ  츦  Լ ȭ鿡 ѷش.
//
// * mk_fmt() call this function.
//

int ChangeLip( FRAME_Data *Frame )
{
	int	i, j;
	int Nlip;
	short	*LinSnt;
	char	*Lshape;

	Nlip = Frame->Sentence->Lip_shape->Nlip;
	LinSnt = Frame->Sentence->Lip_shape->LinSnt;
	Lshape = Frame->Sentence->Lip_shape->Lshape;
	
	if( Nlip == 0 )	return(0);
	
	i = (T_Sync[0] - T_Sync[2]) / 16;
	for(j = 0; j < Nlip; j++ )
	{
		if( i >= LinSnt[j]  && i<LinSnt[j+1])
		{
				pCLip->ChangeBitmap((int)Lshape[j]);
				break;
		}
	}

	return(1);

}


void Set_Slider_Pos(int *T_Sync)
{
	float	f_Curpos;

	f_Curpos = (float)T_Sync[0] / T_Sync[1] * nS_MAX;
	ppp->m_slider_position.SetPos((int)f_Curpos);
}


void ReplaceBitmap(int nLipNum)
{
	pCLip->ChangeBitmap(nLipNum);
}


int	Check_LipSync()
{
	return( pOpt->m_radio_SyncOn.GetCheck() );
}